# -------------------------------------------------------------------------------
# Description:  
# Reference:
# Name:   Cnkicrawl
# Author: wujunchao
# Date:   2021/10/1
# -------------------------------------------------------------------------------
import csv
import time
from selenium import webdriver

items = csv.reader(open('CnkiLinks.csv','r'))
for item in items:
    name = item[0]
    link = item[1]
    if("https://navi.cnki.net/" in link):
        # 打开link并切换页面
        wd = webdriver.Firefox()
        wd.get(link)
        time.sleep(5)
        wd.find_element_by_xpath("/html/body/div[2]/div[2]/div[3]/div[1]/ul[1]/li[3]/a").click()
        time.sleep(5)
        # 获取当前页面的滚动条纵坐标位置
        js = "var q = document.documentElement.scrollTop=30000"
        wd.execute_script(js)
        time.sleep(5)
        # 采集关键词
        words = ""
        for i in range(1,20):
            word = wd.find_element_by_xpath("/html/body/div[2]/div[2]/div[3]/div[4]/div/div[1]/div[2]/div/div[2]/div/*[name()='svg']/*[name()='g'][6]/*[name()='text']["+str(i)+"]").text
            if(i == 0):
                words = words + word
            else:
                words = words + "," + word
        # 写入txt
        with open("CnkiWords.txt", "w+") as f:
            f.write(name)
            f.write("\n")
            f.write(words)
            f.write("\n")
        # 关闭浏览器
        wd.close()

